# -*- coding: utf-8 -*-

import scrapy
import re
import json
from snowball_crawler.items import SnowballCubeItem
from snowball_crawler.spiders import snowball_spider

class snowball_daily_holding_spider( snowball_spider.snowball_base_spider ):
    '''
    每日组合持仓信息爬取
    '''
    name ="snowball_daily_holding"
    allowed_domains=["xueqiu.com"]

    #实现start_spider方法 登录成功后会调用
    def start_spider(self):
        self.logger.debug('daily holding login done ')
        # 开始读取用户组合
        for page in range(000000, 999999):
            symbol ='ZH%s'%str(page).zfill(6)
            yield scrapy.Request(url='https://www.xueqiu.com/p/%s' % symbol,
                                 meta={'symbol': symbol},
                                 headers=self.headers,
                                 callback=self.parse_cube)

    def parse_cube(self, response):

        html = response.body.decode()

        pos_start = html.find('SNB.cubeInfo = ') + len('SNB.cubeInfo = ')
        pos_end = html.find('SNB.cubePieData')

        if pos_end <= 0:
            return
        item = SnowballCubeItem()
        item['symbol'] = response.meta['symbol']
        item['cube_info'] = json.loads(html[pos_start:pos_end - 2])

        #self.logger.debug('%s   %d  %d '%(item['cube_info'],pos_start,pos_end))
        return item

